##  [1] "codeindyr"    "code"         "countryname"  "year"         "indicator"   
##  [6] "estimate"     "stddev"       "nsource"      "pctrank"      "pctranklower"
## [11] "pctrankupper"
## # A tibble: 6 × 5
##   countryname    code   year wgi_cc               wgi_ge              
##   <chr>          <chr> <dbl> <chr>                <chr>               
## 1 Afghanistan    AFG    2022 -1.1836843490600586  -1.8800348043441772 
## 2 Albania        ALB    2022 -0.40818944573402405 6.454053521156311E-2
## 3 Algeria        DZA    2022 -0.63804107904434204 -0.51349949836730957
## 4 American Samoa ASM    2022 1.2702723741531372   0.66762912273406982 
## 5 Andorra        ADO    2022 1.2702723741531372   1.4952607154846191  
## 6 Angola         AGO    2022 -0.61237573623657227 -1.0260342359542847
## [1] "Country Name"  "Country Code"  "Series Name"   "Series Code"  
## [5] "2022 [YR2022]"
##     Country Name Country Code
## 1    Afghanistan          AFG
## 2    Afghanistan          AFG
## 3    Afghanistan          AFG
## 4    Afghanistan          AFG
## 5 American Samoa          ASM
## 6 American Samoa          ASM
##                                                                  Series Name
## 1 Carbon dioxide (CO2) emissions excluding LULUCF per capita (t CO2e/capita)
## 2                                         GDP per capita (constant 2015 US$)
## 3                               Energy use (kg of oil equivalent per capita)
## 4                                   Urban population (% of total population)
## 5 Carbon dioxide (CO2) emissions excluding LULUCF per capita (t CO2e/capita)
## 6                                         GDP per capita (constant 2015 US$)
##            Series Code         2022 [YR2022]
## 1 EN.GHG.CO2.PC.CE.AR5   0.20355189041619276
## 2       NY.GDP.PCAP.KD    377.66562708070467
## 3    EG.USE.PCAP.KG.OE                    ..
## 4    SP.URB.TOTL.IN.ZS                26.616
## 5 EN.GHG.CO2.PC.CE.AR5 0.0020685945968309132
## 6       NY.GDP.PCAP.KD    13709.097489547281
## 'data.frame':    865 obs. of  5 variables:
##  $ Country Name : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ Country Code : chr  "AFG" "AFG" "AFG" "AFG" ...
##  $ Series Name  : chr  "Carbon dioxide (CO2) emissions excluding LULUCF per capita (t CO2e/capita)" "GDP per capita (constant 2015 US$)" "Energy use (kg of oil equivalent per capita)" "Urban population (% of total population)" ...
##  $ Series Code  : chr  "EN.GHG.CO2.PC.CE.AR5" "NY.GDP.PCAP.KD" "EG.USE.PCAP.KG.OE" "SP.URB.TOTL.IN.ZS" ...
##  $ 2022 [YR2022]: chr  "0.20355189041619276" "377.66562708070467" ".." "26.616" ...
## # A tibble: 6 × 6
##   country_name        country_code carbon_dioxide_co2_e…¹ gdp_per_capita_const…²
##   <chr>               <chr>        <chr>                  <chr>                 
## 1 Afghanistan         AFG          0.20355189041619276    377.66562708070467    
## 2 American Samoa      ASM          0.0020685945968309132  13709.097489547281    
## 3 Antigua and Barbuda ATG          3.3013787160706594     17456.872224310424    
## 4 Aruba               ABW          4.6845587550088537     29917.128028704523    
## 5 Azerbaijan          AZE          3.8875713436608024     5599.5852510053855    
## 6 Bangladesh          BGD          0.73252044425188623    1804.3493461776036    
## # ℹ abbreviated names:
## #   ¹​carbon_dioxide_co2_emissions_excluding_lulucf_per_capita_t_co2e_capita,
## #   ²​gdp_per_capita_constant_2015_us
## # ℹ 2 more variables: energy_use_kg_of_oil_equivalent_per_capita <chr>,
## #   urban_population_percent_of_total_population <chr>
## # A tibble: 6 × 6
##   country             code  co2_pc                PBI_pc     energy_pc urban_pop
##   <chr>               <chr> <chr>                 <chr>      <chr>     <chr>    
## 1 Afghanistan         AFG   0.20355189041619276   377.66562… ..        26.616   
## 2 American Samoa      ASM   0.0020685945968309132 13709.097… ..        87.196   
## 3 Antigua and Barbuda ATG   3.3013787160706594    17456.872… ..        24.346   
## 4 Aruba               ABW   4.6845587550088537    29917.128… ..        44.052   
## 5 Azerbaijan          AZE   3.8875713436608024    5599.5852… 1714.037… 57.17    
## 6 Bangladesh          BGD   0.73252044425188623   1804.3493… 297.1183… 39.711
## tibble [215 × 6] (S3: tbl_df/tbl/data.frame)
##  $ country  : chr [1:215] "Afghanistan" "American Samoa" "Antigua and Barbuda" "Aruba" ...
##  $ code     : chr [1:215] "AFG" "ASM" "ATG" "ABW" ...
##  $ co2_pc   : chr [1:215] "0.20355189041619276" "0.0020685945968309132" "3.3013787160706594" "4.6845587550088537" ...
##  $ PBI_pc   : chr [1:215] "377.66562708070467" "13709.097489547281" "17456.872224310424" "29917.128028704523" ...
##  $ energy_pc: chr [1:215] ".." ".." ".." ".." ...
##  $ urban_pop: chr [1:215] "26.616" "87.196" "24.346" "44.052" ...
## tibble [215 × 6] (S3: tbl_df/tbl/data.frame)
##  $ country  : chr [1:215] "Afghanistan" "American Samoa" "Antigua and Barbuda" "Aruba" ...
##  $ code     : chr [1:215] "AFG" "ASM" "ATG" "ABW" ...
##  $ co2_pc   : num [1:215] 0.20355 0.00207 3.30138 4.68456 3.88757 ...
##  $ PBI_pc   : num [1:215] 378 13709 17457 29917 5600 ...
##  $ energy_pc: num [1:215] NA NA NA NA 1714 ...
##  $ urban_pop: num [1:215] 26.6 87.2 24.3 44.1 57.2 ...
##   country      code    co2_pc    PBI_pc energy_pc urban_pop 
##         0         0        14        10        67         2
## Rows: 136
## Columns: 10
## $ country     <chr> "Azerbaijan", "Bangladesh", "Belgium", "Bosnia and Herzego…
## $ code        <chr> "AZE", "BGD", "BEL", "BIH", "BFA", "KHM", "ALB", "ARG", "A…
## $ co2_pc      <dbl> 3.8875713, 0.7325204, 7.6830211, 7.0870837, 0.2729526, 1.0…
## $ PBI_pc      <dbl> 5599.5853, 1804.3493, 44596.7860, 6327.0629, 739.3630, 201…
## $ energy_pc   <dbl> 1714.0373, 297.1184, 4368.4456, 2270.7264, 312.9291, 501.6…
## $ urban_pop   <dbl> 57.170, 39.711, 98.153, 49.841, 31.877, 25.114, 63.799, 92…
## $ countryname <chr> "Azerbaijan", "Bangladesh", "Belgium", "Bosnia and Herzego…
## $ year        <dbl> 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022…
## $ wgi_cc      <dbl> -1.04090738, -1.07575178, 1.49497354, -0.68431562, -0.1116…
## $ wgi_ge      <dbl> -0.04120710, -0.76335531, 1.22718644, -1.06496072, -0.8531…
## [1] 136

Mapa

world <- world %>%
  dplyr::mutate(
    iso3 = countrycode::countrycode(country,
                                    origin = "country.name",
                                    destination = "iso3c")
  )
world_map <- rnaturalearth::ne_countries(
  scale = "medium",
  returnclass = "sf"
)
map_data <- dplyr::left_join(
  world_map,
  world,
  by = c("iso_a3" = "iso3")
)
fig <- plotly::plot_ly(
  data = world_plot,
  type = "choropleth",
  locations = ~code,
  locationmode = "ISO-3",
  z = ~co2_log,
  text = ~paste(
    "<b>País:</b>", country,
    "<br><b>CO₂ per cápita:</b>", round(co2_pc, 2), "t"
  ),
  hoverinfo = "text",
  colorscale = "Blues",
  reversescale = TRUE,
  marker = list(
    line = list(color = "gray20", width = 0.3)
  ),
  colorbar = list(
    title = "CO₂ per cápita (t)",
    tickvals = log10(c(0.1, 1, 5, 10, 20)),
    ticktext = c("0.1", "1", "5", "10", "20"),
    x = 1.03,   
    y = 0.75      
  )
) %>%
  plotly::layout(
    title = list(
      text = "<b>Mapa mundial de calor de emisiones CO₂ per cápita del año 2022</b>",
      x = 0.5, y = 0.97,
      font = list(size = 22)
    ),

    annotations = list(

 
      list(
        text = "Valores representados en escala logarítmica para facilitar comparación global",
        x = 0.5, y = 0.91,
        xref = "paper", yref = "paper",
        showarrow = FALSE,
        font = list(size = 16, color = "gray30")
      ),

      list(
        text = "Fuente: World Development Indicators (2023) | CO₂ per cápita (t CO₂e/persona)",
        x = 0.5, y = -0.1,
        xref = "paper", yref = "paper",
        showarrow = FALSE,
        font = list(size = 11, color = "gray40")
      )
    ),

    geo = list(
      showframe = FALSE,
      projection = list(type = "robinson"),
      showcoastlines = TRUE,
      coastlinecolor = "gray40",
      showcountries = TRUE,
      countrycolor = "gray50"
    )
  )

fig
##  [1] "country"     "code"        "co2_pc"      "PBI_pc"      "energy_pc"  
##  [6] "urban_pop"   "countryname" "year"        "wgi_cc"      "wgi_ge"     
## [11] "iso3"
descriptivos <- world %>%
  summarise(
    media_ge = mean(wgi_ge),
    sd_ge = sd(wgi_ge),
    media_co2 = mean(co2_pc),
    sd_co2 = sd(co2_pc),
    media_pbi = mean(PBI_pc),
    sd_pbi = sd(PBI_pc),
    media_energy = mean(energy_pc),
    sd_energy = sd(energy_pc),
    media_urban = mean(urban_pop),
    sd_urban = sd(urban_pop)
  ) %>%
  pivot_longer(everything(),
               names_to = "Medida",
               values_to = "Valor")

descriptivos %>%
  gt() %>%
  tab_header(
    title = "Estadísticos Descriptivos de las Variables del Estudio"
  ) %>%
  fmt_number(columns = Valor, decimals = 3)
Estadísticos Descriptivos de las Variables del Estudio
Medida Valor
media_ge 0.080
sd_ge 0.967
media_co2 5.182
sd_co2 6.125
media_pbi 17,154.785
sd_pbi 21,822.907
media_energy 2,392.676
sd_energy 2,776.515
media_urban 64.898
sd_urban 21.280

Graficos

ggplot(world, aes(x = wgi_ge)) +
  geom_histogram(bins = 30, aes(fill = ..count..), color = "white") +
  scale_fill_viridis(option = "E") +
  labs(
    title = "Distribución del Indicador de Efectividad del Gobierno (WGI)",
    x = "WGI - Efectividad del Gobierno",
    y = "Frecuencia"
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(world, aes(x = co2_pc)) +
  geom_histogram(binwidth = 0.2, fill = "#27AE60", color = "white") +
  scale_x_log10(
    labels = label_comma(),
    breaks = c(0.1, 0.5, 1, 2, 5, 10, 20)
  ) +
  labs(
    title = "Distribución de las Emisiones de CO2 per cápita (escala logarítmica)",
    x = "CO₂ per cápita (toneladas, escala log)",
    y = "Frecuencia (número de países)"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.minor = element_blank(),
    plot.title = element_text(face = "bold")
  )

ggplot(world, aes(x = PBI_pc)) +
  geom_histogram(bins = 30, aes(fill = ..count..), color = "white") +
  scale_fill_viridis(option = "C", direction = -1) +
  scale_x_log10(labels = comma_format()) +
  labs(
    title = "Distribución del PBI per cápita (escala logarítmica)",
    x = "PBI per cápita (US$ constantes 2015, escala log)",
    y = "Frecuencia",
    fill = "Frecuencia"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    panel.grid.minor = element_blank()
  )

ggplot(world, aes(x = energy_pc)) +
  geom_histogram(bins = 30, aes(fill = ..count..), color = "white") +
  scale_fill_viridis(option = "D", direction = 1) +
  scale_x_log10(labels = comma_format()) +
  labs(
    title = "Distribución del Uso de Energía per cápita (escala logarítmica)",
    x = "Energía (kg de petróleo equivalente per cápita, escala log)",
    y = "Frecuencia",
    fill = "Frecuencia"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    panel.grid.minor = element_blank()
  )

ggplot(world, aes(x = urban_pop)) +
  geom_histogram(bins = 30, aes(fill = ..count..), color = "white") +
  scale_fill_viridis(option = "B") +
  labs(
    title = "Distribución del Porcentaje de Población Urbana",
    x = "% población urbana",
    y = "Frecuencia"
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none")

ggplot(world, aes(x = wgi_cc)) +
  geom_histogram(bins = 30, aes(fill = ..count..), color = "white") +
  scale_fill_viridis(option = "A") +
  labs(
    title = "Distribución del Indicador de Control de Corrupción (WGI)",
    x = "WGI - Control de Corrupción",
    y = "Frecuencia"
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "none")

Analisis Bivariado

En base a lo anteriormente visto se usarán las variables pertinentes como variables logaritmicas

world <- world %>% 
  filter(co2_pc > 0,
         PBI_pc > 0,
         energy_pc > 0) %>% 
  mutate(
    co2_pc   = log(co2_pc),
    PBI_pc   = log(PBI_pc),
    energy_pc = log(energy_pc)
  )

Co2 vs PBI_pc

cor_PBI <- stats::cor(world$co2_pc, world$PBI_pc)
cor_PBI
## [1] 0.8311698
tabla_cor_PBI <- dplyr::tibble(
  variable_dependiente   = "co2_pc",
  variable_independiente = "PBI_pc",
  correlacion_pearson    = cor_PBI
)
cor_PBI <- stats::cor(world$co2_pc, world$PBI_pc)

tabla_co2_pbi <- dplyr::tibble(
  variable_dependiente   = "CO2 per cápita",
  variable_independiente = "PBI per cápita",
  correlacion_pearson    = cor_PBI
)

gt::gt(tabla_co2_pbi) %>%
  gt::fmt_number(columns = correlacion_pearson, decimals = 3) %>%
  gt::tab_header(title = "Correlación: CO₂ vs PBI per cápita")
Correlación: CO₂ vs PBI per cápita
variable_dependiente variable_independiente correlacion_pearson
CO2 per cápita PBI per cápita 0.831

gráfico 1.2

g_PBI <- ggplot2::ggplot(world, ggplot2::aes(x = PBI_pc, y = co2_pc)) +
  ggplot2::geom_point(alpha = 0.7, color = viridis::viridis(1, option = "D")) +
  ggplot2::geom_smooth(method = "lm", se = TRUE, color = "black") +
  ggplot2::scale_x_continuous(labels = scales::label_comma()) +
  ggplot2::scale_y_continuous(labels = scales::label_comma()) +
  ggplot2::labs(
    title = "CO₂ per cápita vs PBI per cápita",
    x = "PBI per cápita (US$ constantes 2015)",
    y = "CO₂ per cápita (t CO₂e/cápita)"
  ) +
  ggplot2::theme_minimal(base_size = 14)

g_PBI
## `geom_smooth()` using formula = 'y ~ x'

2. CO2 vs energy_pc

cor_energy <- cor(world$co2_pc, world$energy_pc)
cor_energy
## [1] 0.9273561
tabla_cor_energy <- tibble::tibble(
  variable_dependiente   = "co2_pc",
  variable_independiente = "energy_pc",
  correlacion_pearson    = cor_energy
)

tabla_cor_energy %>%
  gt::gt() %>%
  gt::fmt_number(columns = correlacion_pearson, decimals = 3) %>%
  gt::tab_header(
    title = "Correlación entre CO₂ per cápita y uso de energía per cápita"
  )
Correlación entre CO₂ per cápita y uso de energía per cápita
variable_dependiente variable_independiente correlacion_pearson
co2_pc energy_pc 0.927
g_energy <- ggplot2::ggplot(world, ggplot2::aes(x = energy_pc, y = co2_pc)) +
  ggplot2::geom_point(alpha = 0.7, color = viridis::viridis(1, option = "E")) +
  ggplot2::geom_smooth(method = "lm", se = TRUE, color = "black") +
  ggplot2::scale_x_continuous(labels = scales::label_comma()) +
  ggplot2::scale_y_continuous(labels = scales::label_comma()) +
  ggplot2::labs(
    title = "CO₂ per cápita vs Uso de energía per cápita",
    x = "Uso de energía per cápita (kg de equivalente de petróleo)",
    y = "CO₂ per cápita (t CO₂e/cápita)"
  ) +
  ggplot2::theme_minimal(base_size = 14)

g_energy
## `geom_smooth()` using formula = 'y ~ x'

  1. CO2 vs urban_pop
cor_urban <- stats::cor(world$co2_pc, world$urban_pop)
cor_urban
## [1] 0.7071991
tabla_cor_urban <- dplyr::tibble(
  variable_dependiente   = "co2_pc",
  variable_independiente = "urban_pop",
  correlacion_pearson    = cor_urban
)

gt::gt(tabla_cor_urban) %>%
  gt::fmt_number(columns = correlacion_pearson, decimals = 3) %>%
  gt::tab_header(
    title = "Correlación entre CO₂ per cápita y Población urbana"
  )
Correlación entre CO₂ per cápita y Población urbana
variable_dependiente variable_independiente correlacion_pearson
co2_pc urban_pop 0.707
g_urban <- ggplot2::ggplot(world, ggplot2::aes(x = urban_pop, y = co2_pc)) +
  ggplot2::geom_point(alpha = 0.7, color = viridis::viridis(1, option = "C")) +
  ggplot2::geom_smooth(method = "lm", se = TRUE, color = "black") +
  ggplot2::scale_x_continuous(labels = scales::label_comma()) +
  ggplot2::scale_y_continuous(labels = scales::label_comma()) +
  ggplot2::labs(
    title = "CO₂ per cápita vs Población urbana",
    x = "Población urbana (% del total)",
    y = "CO₂ per cápita (t CO₂e/cápita)"
  ) +
  ggplot2::theme_minimal(base_size = 14)

g_urban
## `geom_smooth()` using formula = 'y ~ x'

  1. CO2 vs Corrupción
cor_wgi_cc <- stats::cor(world$co2_pc, world$wgi_cc)
cor_wgi_cc
## [1] 0.5124914
tabla_cor_wgi_cc <- dplyr::tibble(
  variable_dependiente   = "co2_pc",
  variable_independiente = "wgi_cc",
  correlacion_pearson    = cor_wgi_cc
)

gt::gt(tabla_cor_wgi_cc) %>%
  gt::fmt_number(columns = correlacion_pearson, decimals = 3) %>%
  gt::tab_header(
    title = "Correlación entre CO₂ per cápita y Control de la Corrupción"
  )
Correlación entre CO₂ per cápita y Control de la Corrupción
variable_dependiente variable_independiente correlacion_pearson
co2_pc wgi_cc 0.512
g_wgi_cc <- ggplot2::ggplot(world, ggplot2::aes(x = wgi_cc, y = co2_pc)) +
  ggplot2::geom_point(alpha = 0.7, color = viridis::viridis(1, option = "F")) +
  ggplot2::geom_smooth(method = "lm", se = TRUE, color = "black") +
  ggplot2::scale_x_continuous(labels = scales::label_comma()) +
  ggplot2::scale_y_continuous(labels = scales::label_comma()) +
  ggplot2::labs(
    title = "CO₂ per cápita vs Control de la Corrupción",
    x = "Control de la Corrupción (WGI_CC)",
    y = "CO₂ per cápita (t CO₂e/cápita)"
  ) +
  ggplot2::theme_minimal(base_size = 14)

g_wgi_cc
## `geom_smooth()` using formula = 'y ~ x'

  1. CO2 vs Efectividad de gobierno
cor_wgi_ge <- stats::cor(world$co2_pc, world$wgi_ge)
cor_wgi_ge
## [1] 0.5867898
tabla_cor_wgi_ge <- dplyr::tibble(
  variable_dependiente   = "co2_pc",
  variable_independiente = "wgi_ge",
  correlacion_pearson    = cor_wgi_ge
)

gt::gt(tabla_cor_wgi_ge) %>%
  gt::fmt_number(columns = correlacion_pearson, decimals = 3) %>%
  gt::tab_header(
    title = "Correlación entre CO₂ per cápita y Gobierno Efectivo"
  )
Correlación entre CO₂ per cápita y Gobierno Efectivo
variable_dependiente variable_independiente correlacion_pearson
co2_pc wgi_ge 0.587
g_wgi_ge <- ggplot2::ggplot(world, ggplot2::aes(x = wgi_ge, y = co2_pc)) +
  ggplot2::geom_point(alpha = 0.7, color = viridis::viridis(1, option = "A")) +
  ggplot2::geom_smooth(method = "lm", se = TRUE, color = "black") +
  ggplot2::scale_x_continuous(labels = scales::label_comma()) +
  ggplot2::scale_y_continuous(labels = scales::label_comma()) +
  ggplot2::labs(
    title = "CO₂ per cápita vs Gobierno Efectivo",
    x = "Gobierno Efectivo (WGI_GE)",
    y = "CO₂ per cápita (t CO₂e/cápita)"
  ) +
  ggplot2::theme_minimal(base_size = 14)

g_wgi_ge
## `geom_smooth()` using formula = 'y ~ x'

Regresión Gauss

## 
## Call:
## stats::lm(formula = co2_pc ~ wgi_cc + wgi_ge, data = world)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.09271 -0.53770 -0.02201  0.57872  2.60641 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.90395    0.09218   9.806  < 2e-16 ***
## wgi_cc      -0.36733    0.24717  -1.486     0.14    
## wgi_ge       1.14168    0.26157   4.365 2.53e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.037 on 133 degrees of freedom
## Multiple R-squared:  0.355,  Adjusted R-squared:  0.3453 
## F-statistic: 36.61 on 2 and 133 DF,  p-value: 2.159e-13
## 
## Call:
## stats::lm(formula = co2_pc ~ wgi_cc + wgi_ge + PBI_pc, data = world)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4869 -0.4721 -0.0290  0.4744  1.3927 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.5960     0.6437 -13.355  < 2e-16 ***
## wgi_cc       -0.6359     0.1531  -4.153 5.86e-05 ***
## wgi_ge        0.1388     0.1746   0.795    0.428    
## PBI_pc        1.0727     0.0724  14.816  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.638 on 132 degrees of freedom
## Multiple R-squared:  0.7578, Adjusted R-squared:  0.7523 
## F-statistic: 137.7 on 3 and 132 DF,  p-value: < 2.2e-16
## 
## Call:
## stats::lm(formula = co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop, 
##     data = world)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.39408 -0.46346 -0.02925  0.45980  1.29372 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.024233   0.712238 -11.266  < 2e-16 ***
## wgi_cc      -0.657881   0.152297  -4.320 3.06e-05 ***
## wgi_ge       0.214049   0.177992   1.203   0.2313    
## PBI_pc       0.953516   0.097449   9.785  < 2e-16 ***
## urban_pop    0.007488   0.004140   1.809   0.0728 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6326 on 131 degrees of freedom
## Multiple R-squared:  0.7637, Adjusted R-squared:  0.7565 
## F-statistic: 105.9 on 4 and 131 DF,  p-value: < 2.2e-16
modelo_4 <- stats::lm(
  formula = co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop + energy_pc,
  data    = world
)
summary(modelo_4)
## 
## Call:
## stats::lm(formula = co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop + 
##     energy_pc, data = world)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3973 -0.2132  0.0440  0.2714  0.8557 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.753801   0.481486 -18.181  < 2e-16 ***
## wgi_cc      -0.463281   0.103363  -4.482  1.6e-05 ***
## wgi_ge       0.193093   0.119475   1.616 0.108479    
## PBI_pc       0.300214   0.083259   3.606 0.000442 ***
## urban_pop    0.004980   0.002786   1.787 0.076195 .  
## energy_pc    0.924533   0.072908  12.681  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4246 on 130 degrees of freedom
## Multiple R-squared:  0.8944, Adjusted R-squared:  0.8903 
## F-statistic: 220.1 on 5 and 130 DF,  p-value: < 2.2e-16
## 
## Call:
## stats::lm(formula = co2_pc ~ wgi_cc + PBI_pc + energy_pc, data = world)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3893 -0.2035  0.0242  0.2975  0.8131 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -9.33192    0.40520 -23.031  < 2e-16 ***
## wgi_cc      -0.34506    0.06216  -5.551 1.50e-07 ***
## PBI_pc       0.39575    0.07100   5.574 1.35e-07 ***
## energy_pc    0.93353    0.07347  12.707  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4289 on 132 degrees of freedom
## Multiple R-squared:  0.8905, Adjusted R-squared:  0.8881 
## F-statistic:   358 on 3 and 132 DF,  p-value: < 2.2e-16

Elección de regresión

## Analysis of Variance Table
## 
## Model 1: co2_pc ~ wgi_cc + wgi_ge
## Model 2: co2_pc ~ wgi_cc + wgi_ge + PBI_pc
## Model 3: co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop
## Model 4: co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop + energy_pc
## Model 5: co2_pc ~ wgi_cc + PBI_pc + energy_pc
##   Res.Df     RSS Df Sum of Sq        F    Pr(>F)    
## 1    133 143.097                                    
## 2    132  53.734  1    89.364 495.7087 < 2.2e-16 ***
## 3    131  52.425  1     1.309   7.2608  0.007978 ** 
## 4    130  23.436  1    28.989 160.8044 < 2.2e-16 ***
## 5    132  24.285 -2    -0.850   2.3565  0.098782 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Diagnostico del modelo 4

mod_final <- lm(co2_pc ~ wgi_cc + wgi_ge + PBI_pc + urban_pop + energy_pc, data = world)

Linealidad

plot(mod_final, 1,
     col = "#4B4B4B",
     pch = 19,
     cex = 0.8,
     main = "Evaluando Linealidad - Modelo 4")

Homocedasticidad

bp_mod4 <- bptest(mod_final)

data.frame(
  BP = bp_mod4$statistic,
  df = bp_mod4$parameter,
  p_value = bp_mod4$p.value
) %>%
  kable(caption = bp_mod4$method) %>%
  kable_styling(full_width = FALSE)
studentized Breusch-Pagan test
BP df p_value
BP 15.62326 5 0.0080062
plot(mod_final, 3,
     col = "blue",
     pch = 19,
     cex = 0.8,
     main = "Evaluando Homocedasticidad - Modelo 4")

Normalidad de residuos

sw_mod4 <- shapiro.test(mod_final$residuals)

data.frame(
  W = sw_mod4$statistic,
  p_value = sw_mod4$p.value
) %>%
  kable(caption = sw_mod4$method) %>%
  kable_styling(full_width = FALSE)
Shapiro-Wilk normality test
W p_value
W 0.9662635 0.0018936
plot(mod_final, 2,
     col = "#4B4B4B",
     pch = 19,
     cex = 0.8,
     main = "Evaluando Normalidad de Residuos - Modelo 4")

sw_mod4 <- shapiro.test(mod_final$residuals)

data.frame(
  W = sw_mod4$statistic,
  p_value = sw_mod4$p.value
) %>%
  kable(caption = sw_mod4$method) %>%
  kable_styling(full_width = FALSE)
Shapiro-Wilk normality test
W p_value
W 0.9662635 0.0018936

Multicolinealidad

vif_mod4 <- VIF(mod_final)

data.frame(VIF = vif_mod4) %>%
  kable(caption = "Evaluación de Multicolinealidad (VIF) - Modelo 4") %>%
  kable_styling(full_width = FALSE)
Evaluación de Multicolinealidad (VIF) - Modelo 4
VIF
wgi_cc 8.377627
wgi_ge 9.994299
PBI_pc 9.817995
urban_pop 2.631879
energy_pc 4.022312

Valores Influyentes

infl_mod4 <- as.data.frame(influence.measures(mod_final)$is.inf)

infl_mod4[infl_mod4$cook.d | infl_mod4$hat, c("cook.d","hat")] %>%
  kable(caption = "Valores Influyentes Críticos - Modelo 4") %>%
  kable_styling(full_width = FALSE)
Valores Influyentes Críticos - Modelo 4
cook.d hat
NA NA
:—— :—

Analisis factorial

corMatrix <- cor(theData[, efa_vars], use = "pairwise.complete.obs")

round(corMatrix, 2)
##           co2_pc PBI_pc energy_pc urban_pop wgi_cc wgi_ge
## co2_pc      1.00   0.83      0.93      0.71   0.51   0.59
## PBI_pc      0.83   1.00      0.86      0.76   0.80   0.83
## energy_pc   0.93   0.86      1.00      0.69   0.61   0.66
## urban_pop   0.71   0.76      0.69      1.00   0.54   0.53
## wgi_cc      0.51   0.80      0.61      0.54   1.00   0.94
## wgi_ge      0.59   0.83      0.66      0.53   0.94   1.00
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.81
## MSA for each item = 
##    co2_pc    PBI_pc energy_pc urban_pop    wgi_cc    wgi_ge 
##      0.76      0.89      0.82      0.88      0.74      0.77
cortest.bartlett(corMatrix, n = nrow(theData))
## $chisq
## [1] 1040.848
## 
## $p.value
## [1] 2.35711e-212
## 
## $df
## [1] 15
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData[, efa_vars],
            fa      = "fa",
            correct = TRUE,
            plot    = TRUE)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully

## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA
resfa <- fa(theData[, efa_vars],
            nfactors = 2,
            cor      = "mixed",
            rotate   = "varimax",
            fm       = "minres")

print(resfa$loadings)
## 
## Loadings:
##           MR1   MR2  
## co2_pc    0.944 0.243
## PBI_pc    0.740 0.626
## energy_pc 0.868 0.364
## urban_pop 0.670 0.350
## wgi_cc    0.304 0.938
## wgi_ge    0.384 0.869
## 
##                  MR1   MR2
## SS loadings    2.881 2.341
## Proportion Var 0.480 0.390
## Cumulative Var 0.480 0.870
print(resfa$loadings, cutoff = 0.5)
## 
## Loadings:
##           MR1   MR2  
## co2_pc    0.944      
## PBI_pc    0.740 0.626
## energy_pc 0.868      
## urban_pop 0.670      
## wgi_cc          0.938
## wgi_ge          0.869
## 
##                  MR1   MR2
## SS loadings    2.881 2.341
## Proportion Var 0.480 0.390
## Cumulative Var 0.480 0.870
fa.diagram(resfa,
           main = "Resultados del EFA - Factores institucionales y energéticos")

sort(resfa$communality)
## urban_pop energy_pc    wgi_ge    PBI_pc    co2_pc    wgi_cc 
## 0.5713920 0.8864492 0.9024824 0.9391297 0.9495300 0.9728354
sort(resfa$complexity)
##    co2_pc    wgi_cc energy_pc    wgi_ge urban_pop    PBI_pc 
##  1.132319  1.207837  1.341759  1.377274  1.509383  1.946066
resfa$TLI   
## [1] 0.8733423
resfa$rms      
## [1] 0.01823503
resfa$RMSEA   
##      RMSEA      lower      upper confidence 
##  0.2509692  0.1829350  0.3276580  0.9000000
resfa$BIC
## [1] 18.64315
head(resfa$scores)
##             MR1        MR2
## [1,]  0.4738323 -1.0486713
## [2,] -0.9328441 -0.7087230
## [3,]  0.6088820  1.2910130
## [4,]  0.9989459 -1.0885909
## [5,] -2.0240202  0.3141251
## [6,] -0.6764434 -0.8682235
theData$Factor_1 <- resfa$scores[, 1]
theData$Factor_2 <- resfa$scores[, 2]

world <- world %>%
  left_join(
    theData[, c("idx_row", "Factor_1", "Factor_2")],
    by = "idx_row"
  )
world <- world %>%
  rename(
    F_institucional = Factor_1,
    F_energia_desarrollo = Factor_2
  )

Cluster

library(ggplot2)

ggplot(cluster_data,
       aes(x = F_institucional,
           y = F_energia_desarrollo,
           color = cluster_k3,
           label = country)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(
    x = "Factor institucional",
    y = "Factor energía y desarrollo",
    color = "Clúster k-means",
    title = "Agrupación de países según factores institucionales y de energía/desarrollo"
  )

cluster_resumen <- cluster_data %>%
  group_by(cluster_k3) %>%
  summarise(
    n_paises = n(),
    prom_F_institucional = mean(F_institucional),
    prom_F_energia_desarrollo = mean(F_energia_desarrollo),
    .groups = "drop"
)

cluster_resumen
## # A tibble: 3 × 4
##   cluster_k3 n_paises prom_F_institucional prom_F_energia_desarrollo
##   <fct>         <int>                <dbl>                     <dbl>
## 1 1                50                0.646                    -0.719
## 2 2                41                0.412                     1.21 
## 3 3                45               -1.09                     -0.299